# Clear environment
rm(list = ls())

# Set working directory
setwd("/Users/georgemelios/Downloads/PSRM_Replication 3")

# Load required libraries
library(ggplot2)
library(dplyr)
library(readr)
library(openxlsx)
library(tibble)
library(haven)
library(tidyr)
library(purrr)
library(tidyverse)
library(kableExtra)

###### Table 1 ######

# Load data
summ <- read_dta("DATA/for_R/summary_stats.dta")

# Compute summary statistics
df_summary <- summ %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    Total_Observations = n(),
    Count = sum(Value, na.rm = TRUE),
    Min = min(Value, na.rm = TRUE),
    Max = max(Value, na.rm = TRUE),
    Proportion = round(mean(Value, na.rm = TRUE), 3),
    Std_Deviation = round(sd(Value, na.rm = TRUE), 3)
  ) %>%
  ungroup() %>%
  mutate(Variable = case_when(
    Variable == "NonBinary" ~ "Non-Binary",
    Variable == "age" ~ "Age",
    Variable == "degree" ~ "With degree",
    Variable == "diet_short1" ~ "Standard diet",
    Variable == "diet_short2" ~ "Plant-based diet",
    Variable == "ethnic_short1" ~ "White",
    Variable == "ethnic_short2" ~ "Black",
    Variable == "ethnic_short3" ~ "Asian",
    Variable == "ethnic_short4" ~ "Mixed",
    Variable == "ethnic_short5" ~ "Other",
    Variable == "no_degree" ~ "Without degree",
    Variable == "party_id1" ~ "Labour",
    Variable == "party_id2" ~ "Tory",
    Variable == "party_id3" ~ "Neither",
    Variable == "right_left1" ~ "Right-wing",
    Variable == "right_left2" ~ "Left-wing",
    Variable == "right_left3" ~ "Center",
    TRUE ~ Variable
  )) %>%
  arrange(factor(Variable, levels = c(
    "Male", "Female", "Non-Binary", "Age", 
    "With degree", "Without degree", 
    "White", "Black", "Asian", "Mixed", "Other", 
    "Standard diet", "Plant-based diet", 
    "Labour", "Tory", "Neither", 
    "Right-wing", "Center", "Left-wing"
  )))

# Write to Excel
write.xlsx(
  df_summary,
  file = "TAB/tables.xlsx",
  rowNames = FALSE,
  sheetName = "Table 1")

###### Figure 1 ######

# Load Data
observations <- read.table("DATA/for_R/observations.txt")
main_results <- read.table("DATA/for_R/main_results.txt")

# Clean Data
filtered_results <- main_results %>%
  filter(Est. != 0) %>%
  mutate(
    Attribute = c("In-party", "Tolerant", "Progressive", "White", 
                  "Degree", "Vegetarian", "Attractive", "Tall"),
    Baseline = c("Out-party", "Intolerant", "Traditional", "Black", 
                 "No degree", "Non-vegetarian", "Unattractive", "Short"),
    group = case_when(
      Attribute %in% c("In-party", "Tolerant") ~ "Political",
      Attribute %in% c("Attractive", "Tall") ~ "Non-political",
      TRUE ~ "Politically-correlated"
    )
  ) %>%
  mutate(across(c(Est., LCI, UCI, SE), as.numeric)) %>%
  mutate(
    Attribute = factor(Attribute, levels = rev(c(
      "Progressive", "White", "Degree", "Vegetarian",
       "In-party", "Tolerant", "Attractive", "Tall"
    ))),
    group = factor(group, levels = c("Political", "Politically-correlated", "Non-political"))
  )

# Plot
figure_1 <- ggplot(filtered_results, aes(x = Attribute, y = Est., ymin = LCI, ymax = UCI)) +
  geom_pointrange(size = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  coord_flip() +
  facet_grid(group ~ ., scales = "free_y", space = "free_y") +
  labs(x = "Attribute", y = "AMCE") +
  theme_minimal() +
  theme(
    axis.text.y = element_text(face = "bold", size = 12),
    panel.spacing = unit(2, "lines"),
    strip.background = element_rect(fill = "grey90", colour = "black", size = 1),
    strip.text = element_text(face = "bold")
  )

# Save Figure
ggsave("FIG/figure_1.png", plot = figure_1, dpi = 300, width = 8, height = 6)

###### Table A2 ######

filtered_results <- filtered_results[, c("Attribute", "Baseline", "Est.", "SE", "LCI", "UCI")]
filtered_results[, c("SE", "Est.", "LCI", "UCI")] <- round(filtered_results[, c("SE", "Est.", "LCI", "UCI")], 3)

# Add N and export to Excel
filtered_results <- rbind(
  filtered_results,
  data.frame(Attribute = "Number of Observations", Baseline = observations["r1", "c1"], Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Respondents", Baseline = observations["r1", "c2"], Est. = "", SE = "", LCI = "", UCI = "")
)

wb <- loadWorkbook("TAB/tables.xlsx")
addWorksheet(wb, "Table A2")
writeData(wb, "Table A2", filtered_results)

###### Table A4 ######

# Load data
match_results <- read.table("DATA/for_R/match_results_short.txt")

# Clean data
match_results <- match_results %>%
  filter(Est. != 0)

match_results$Attribute <- c("Partisanship Match", "Tolerance Match", "Ideology Match",
                             "Race Match", "Education Match", "Diet Match", "Attractiveness Match", "Height Match")

match_results <- match_results[, c("Attribute", "Est.", "SE", "LCI", "UCI")]
match_results <- match_results %>%
  mutate(across(c(Est., LCI, UCI, SE), as.numeric))
match_results[, c("SE", "Est.", "LCI", "UCI")] <- round(match_results[, c("SE", "Est.", "LCI", "UCI")], 3)

# Add N and export to Excel
match_results <- rbind(
  match_results,
  data.frame(Attribute = "Number of Observations", Est. = observations["r2", "c1"],  SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Respondents", Est. = observations["r2", "c2"],  SE = "", LCI = "", UCI = "")
)

addWorksheet(wb, "Table A4")
writeData(wb, "Table A4", match_results)

###### Figure 2 ######

# Load data
mr_l <- read.table("DATA/for_R/match_results_long.txt")

# Clean data
mr_l$Attribute <- rownames(mr_l)
mr_l <- mr_l %>%
  mutate(Est. = ifelse(Est. == 0, NA, Est.))

mr_l <- mr_l %>%
  mutate(Attribute = rownames(mr_l))

mr_l <- mr_l %>%
  mutate(Attribute = case_when(
    Attribute == "cj_matchbeau2:No_match" ~ "Attractiveness Match",
    Attribute == "cj_matchhei2:No_match" ~ "Height Match",
    Attribute == "cj_matchpid2:No_match" ~ "Partisanship Match",
    Attribute == "cj_matchaff2:No_match" ~ "Tolerance Match",
    Attribute == "cj_matchid2:No_match" ~ "Ideology Match",
    Attribute == "cj_matchrace2:No_match" ~ "Race Match",
    Attribute == "cj_matched2:No_match" ~ "Education Match",
    Attribute == "cj_matchdiet2:No_match" ~ "Diet Match",
    TRUE ~ Attribute
  ))

mr_l$Attribute <- sub(".*:", "", mr_l$Attribute)

mr_l <- mr_l %>%
  mutate(Attribute = case_when(
    Attribute == "No_degree" ~ "No degree",
    Attribute == "Non_veg" ~ "Non-vegetarian", 
    Attribute == "Veg" ~ "Vegetarian",
    TRUE ~ Attribute
  ))

mr_l <- mr_l %>%
  mutate(across(c(Est., LCI, UCI, SE), as.numeric))

mr_l$Attribute <- factor(mr_l$Attribute, levels = rev(unique(mr_l$Attribute)))

mr_l$group <- factor(case_when(
  mr_l$Attribute %in% c("Attractiveness Match", "Low", "High", "Height Match", "Short", "Tall") ~ "Non-political",
  mr_l$Attribute %in% c("Partisanship Match", "Tory", "Labour", "Tolerance Match", "Intolerant", "Tolerant") ~ "Political",
  TRUE ~ "Politically-correlated"
), levels = c("Political", "Politically-correlated", "Non-political"))

# Plot
bold_labels <- function(breaks) {
  sapply(breaks, function(lbl) {
    if(lbl %in% mr_l$Attribute[is.na(mr_l$Est.)]) {
      return(parse(text = paste0("bold('", lbl, "')")))
    } else {
      return(lbl)
    }
  })
}

figure_2 <- ggplot(mr_l, aes(x = Attribute, y = Est., ymin = LCI, ymax = UCI)) +
  geom_pointrange(aes(color = is.na(Est.)), na.rm = TRUE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_color_manual(values = c("black", "transparent"), guide = FALSE) +
  coord_flip() +
  labs(x = "Attribute", y = "AMCE") +
  facet_grid(group ~ ., scales = "free_y", space = "free_y") + 
  theme_minimal() +
  theme(
    axis.text.y = element_text(face = "bold", size=12),
    panel.spacing = unit(2, "lines"),              # Increase spacing between facets
    strip.background = element_rect(fill = "grey90", colour = "black", size = 1), # Stronger border and shading for facet labels
    strip.text = element_text(face = "bold", size=13)       # Bold facet labels
  ) +
  scale_x_discrete(labels = bold_labels)

ggsave("FIG/figure_2.png", plot = figure_2 , width = 10, height = 7, units = "in")

###### Table A5 ######

mr_l2 <- mr_l[, c("Attribute", "Est.", "SE", "LCI", "UCI")]
mr_l2[, c("SE", "Est.", "LCI", "UCI")] <- round(mr_l2[, c("SE", "Est.", "LCI", "UCI")], 3)

# Add N and export to Excel
mr_l2 <- rbind(
  mr_l2,
  data.frame(Attribute = "Number of Observations", Est. = observations["r3", "c1"],  SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Respondents", Est. = observations["r3", "c2"],  SE = "", LCI = "", UCI = "")
)

addWorksheet(wb, "Table A5")
writeData(wb, "Table A5", mr_l2)

###### Table A6 ######

match_tp <- read.table("DATA/for_R/match_tolerance_party.txt")

rows_to_keep <- c("cj_matchaff3:In_party_Intolerant", "cj_matchaff3:In_party_Tolerant", "cj_matchaff3:Out_party_Intolerant", "cj_matchaff3:Out_party_Tolerant")
match_tp <- match_tp[rownames(match_tp) %in% rows_to_keep, ]

match_tp <- match_tp %>%
  mutate(across(c(Est., LCI, UCI, SE), as.numeric))

Attribute <- c("In-party:Intolerant", "In-party:Tolerant", "Out-party:Intolerant", "Out-party:Tolerant")

# Add the Attribute variable to match_tp and reorder columns
match_tp <- match_tp %>%
  mutate(Attribute = Attribute)

match_tp <- match_tp[, c("Attribute", "Est.", "SE", "LCI", "UCI")]
match_tp[, c("SE", "Est.", "LCI", "UCI")] <- round(match_tp[, c("SE", "Est.", "LCI", "UCI")], 3)

# Add N and export to Excel
match_tp <- rbind(
  match_tp,
  data.frame(Attribute = "Number of Observations", Est. = observations["r4", "c1"],  SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Respondents", Est. = observations["r4", "c2"],  SE = "", LCI = "", UCI = "")
)

addWorksheet(wb, "Table A6")
writeData(wb, "Table A6", match_tp)

###### Figure 3 ######

# Load data
male <- read.table("DATA/for_R/male_results.txt")
female <- read.table("DATA/for_R/female_results.txt")

# Function to process the datasets
process_data <- function(df) {
  # Drop rows where Est. = 0
  df <- df[df$Est. != 0, ]
  # Keep only required columns and convert them to numeric
  df <- df[, c("Est.", "SE", "LCI", "UCI")]
  df$Est. <- as.numeric(df$Est.)
  df$SE <- as.numeric(df$SE)
  df$LCI <- as.numeric(df$LCI)
  df$UCI <- as.numeric(df$UCI)
  # Create 'Attribute' variable from row names
  df$Attribute <- rownames(df)
  # Clean the 'Attribute' variable
  df$Attribute <- sub(".*:", "", df$Attribute)
  return(df)
}

male_processed <- process_data(male)
female_processed <- process_data(female)

# Combine
male_processed$Gender <- "Male"
female_processed$Gender <- "Female"
combined_df <- rbind(male_processed, female_processed)

#Clean 
combined_df <- combined_df %>%
  mutate(Attribute = case_when(
    Attribute == "Veg" ~ "Vegetarian",
    TRUE ~ Attribute
  ))

# Group
combined_df$group <- factor(case_when(
  combined_df$Attribute %in% c("Attractive", "Tall") ~ "Non-political",
  combined_df$Attribute %in% c("In_party", "Tolerant") ~ "Political",
  TRUE ~ "Politically-correlated"
), levels = c("Political", "Politically-correlated", "Non-political"))

# Plot
combined_df$Attribute <- factor(combined_df$Attribute, levels = rev(unique(combined_df$Attribute)))

figure_3 <- ggplot(combined_df, aes(x = Attribute, y = Est., ymin = LCI, ymax = UCI)) +
  geom_pointrange(aes(color = Gender, shape = Gender), position = position_dodge(width = 0), na.rm = TRUE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_color_manual(values = c(Male = "black", Female = "black")) +
  scale_shape_manual(values = c(Male = 19, Female = 1)) +  coord_flip() +
  facet_grid(group ~ ., scales = "free_y", space = "free_y") + 
  labs(x = "Attribute", y = "AMCE", color = "Gender", shape = "Gender") +
  theme_minimal() +
  theme(
    axis.text.y = element_text(face = "bold", size=12),
    panel.spacing = unit(2, "lines"),              # Increase spacing between facets
    strip.background = element_rect(fill = "grey90", colour = "black", size = 1), # Stronger border and shading for facet labels
    strip.text = element_text(face = "bold"),       # Bold facet labels
    legend.position = "bottom"                      # Place legend at the bottom
  )

ggsave("FIG/figure_3.png", plot=figure_3 , dpi = 300)

###### Table A7 ######

combined_df <- combined_df[, c("Attribute", "Est.", "SE", "LCI", "UCI")]

Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Without degree", 
              "Non-vegetarian", "Unattractive", "Short", "Out-party", "Intolerant", 
              "Traditional", "Black", "Without degree", 
              "Non-vegetarian", "Unattractive", "Short")

combined_df <- data.frame(combined_df[, 1:1, drop = FALSE], 
                          Baseline = Baseline, 
                          combined_df[, (1+1):ncol(combined_df), drop = FALSE])

# Add rows for male sample and female sample

combined_df[, c("SE", "Est.", "LCI", "UCI")] <- round(combined_df[, c("SE", "Est.", "LCI", "UCI")], 3)

combined_df <- combined_df %>%
  add_row(Attribute = "MALE SAMPLE", .before = 1) %>%
  add_row(Attribute = "MALE: Number of Observations", Est. = observations["r5", "c1"], .before = 10) %>%
  add_row(Attribute = "MALE: Number of Respondents", Est. = observations["r5", "c2"], .before = 11) %>%
  add_row(Attribute = "FEMALE SAMPLE", .before = 12)

# Add rows for female observations at the end
combined_df <- combined_df %>%
  add_row(Attribute = "FEMALE: Number of Observations", Est. = observations["r6", "c1"]) %>%
  add_row(Attribute = "FEMALE: Number of Respondents", Est. = observations["r6", "c2"])

addWorksheet(wb, "Table A7")
writeData(wb, "Table A7", combined_df)

###### Figure 4 ######

male_labour <- read.table("DATA/for_R/male_labour.txt")
female_labour <- read.table("DATA/for_R/female_labour.txt")
male_tory <- read.table("DATA/for_R/male_tory.txt")
female_tory <- read.table("DATA/for_R/female_tory.txt")

# Function to process the dataset
process_data <- function(df) {
  # Drop rows where Est. = 0
  df <- df[df$Est. != 0, ]
  # Keep only required columns and convert them to numeric
  df <- df[, c("Est.", "SE", "LCI", "UCI")]
  df$Est. <- as.numeric(df$Est.)
  df$SE <- as.numeric(df$SE)
  df$LCI <- as.numeric(df$LCI)
  df$UCI <- as.numeric(df$UCI)
  
  # Create 'Attribute' variable from row names
  df$Attribute <- rownames(df)
  
  # Clean the 'Attribute' variable
  df$Attribute <- sub(".*:", "", df$Attribute)
  
  return(df)
}

# Process all datasets
male_lab_proc <- process_data(male_labour)
female_lab_proc <- process_data(female_labour)
male_tor_proc <- process_data(male_tory)
female_tor_proc <- process_data(female_tory)

# Combine
male_lab_proc$Gender <- "Male"
female_lab_proc$Gender <- "Female"
male_tor_proc$Gender <- "Male"
female_tor_proc$Gender <- "Female"

male_lab_proc$Party <- "Labour"
female_lab_proc$Party <- "Labour"
male_tor_proc$Party <- "Tory"
female_tor_proc$Party <- "Tory"

combined_df_2 <- rbind(male_lab_proc, female_lab_proc, male_tor_proc, female_tor_proc)

rows_to_keep <- c("cj_matchpid:In_party", "cj_affect:Tolerant", "cj_matchpid:In_party1", "cj_affect:Tolerant1",
                  "cj_matchpid:In_party2", "cj_affect:Tolerant2", "cj_matchpid:In_party3", "cj_affect:Tolerant3")
combined_df_2 <- combined_df_2[rownames(combined_df_2) %in% rows_to_keep, ]

# Clean combined
combined_df_2 <- combined_df_2 %>%
  mutate(Attribute = case_when(
    Attribute == "_1" ~ "In-party",
    Attribute == "High" ~ "Tolerant",
    TRUE ~ Attribute
  ))

# Plot
figure_4 <- ggplot(combined_df_2, aes(x = Attribute, y = Est., ymin = LCI, ymax = UCI, shape = Gender)) +
  geom_pointrange(aes(color = Gender), position = position_dodge(width = 0.5), size = 0.6) +
  facet_wrap(~ Party, scales = "free_x") +
  scale_shape_manual(values = c(21, 16)) +  # Open circle for Female, filled for Male
  scale_color_manual(values = c("black", "black")) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_y_continuous(limits = c(0.05, NA)) + 
  labs(x = "Attribute", y = "AMCE", shape = "Gender", color = "Gender") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(face = "bold"),
    legend.position = "bottom"
  )

# Save the plot
ggsave("FIG/figure_4.png", plot = figure_4, dpi = 300, width = 8, height = 6)

###### Table A8 ######

# Load data
male_match <- read.table("DATA/for_R/male_match.txt")
female_match <- read.table("DATA/for_R/female_match.txt")

# Function to process the dataset
process_data <- function(df) {
  # Drop rows where Est. = 0
  df <- df[df$Est. != 0, ]
  # Keep only required columns and convert them to numeric
  df <- df[, c("Est.", "SE", "LCI", "UCI")]
  df$Est. <- as.numeric(df$Est.)
  df$SE <- as.numeric(df$SE)
  df$LCI <- as.numeric(df$LCI)
  df$UCI <- as.numeric(df$UCI)
  # Create 'Attribute' variable from row names
  df$Attribute <- rownames(df)
  # Clean the 'Attribute' variable
  df$Attribute <- sub(".*:", "", df$Attribute)
  return(df)
}

# Process both datasets
male_processed_match <- process_data(male_match)
female_processed_match <- process_data(female_match)

#Combine
male_processed_match$Gender <- "Male"
female_processed_match$Gender <- "Female"
combined_df_match <- rbind(male_processed_match, female_processed_match)

# Clean combined
combined_df_match <- combined_df_match %>%
  mutate(Attribute = case_when(
    Attribute == "_1" ~ "In-party",
    Attribute == "Veg" ~ "Vegetarian",
    Attribute == "High" ~ "Tolerant",
    Attribute == "Profile_more_educated" ~ "More educated",
    Attribute == "Taller_profile" ~ "Taller",
    TRUE ~ Attribute
  ))

combined_df_match <- combined_df_match[, c("Attribute", "Est.", "SE", "LCI", "UCI")]

Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Less educated", 
              "Non-vegetarian", "Unattractive", "Shorter", "Out-party", "Intolerant", 
              "Traditional", "Black", "Less educated", 
              "Non-vegetarian", "Unattractive", "Shorter")

combined_df_match <- data.frame(combined_df_match[, 1:1, drop = FALSE], 
                          Baseline = Baseline, 
                          combined_df_match[, (1+1):ncol(combined_df_match), drop = FALSE])

combined_df_match[, c("SE", "Est.", "LCI", "UCI")] <- round(combined_df_match[, c("SE", "Est.", "LCI", "UCI")], 3)

combined_df_match <- combined_df_match %>%
  add_row(Attribute = "Male Sample", .before = 1) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r7", "c2"], .before = 10) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r7", "c1"], .before = 10) %>%
  add_row(Attribute = "Female Sample", .before = 12) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r8", "c2"], .before = 21) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r8", "c1"], .before = 21) 
combined_df_match
# Export to excel
addWorksheet(wb, "Table A8")
writeData(wb, "Table A8", combined_df_match)

###### Table A9 ######

combined_df_2 <- combined_df_2[, c("Attribute", "Est.", "SE", "LCI", "UCI")]

Baseline <- c("Out-party", "Intolerant")

combined_df_2 <- data.frame(combined_df_2[, 1:1, drop = FALSE], 
                          Baseline = Baseline, 
                          combined_df_2[, (1+1):ncol(combined_df_2), drop = FALSE])

# Add rows for male sample and female sample

combined_df_2[, c("SE", "Est.", "LCI", "UCI")] <- round(combined_df_2[, c("SE", "Est.", "LCI", "UCI")], 3)

combined_df_2 <- combined_df_2 %>%
  add_row(Attribute = "Labour Male Sample", .before = 1) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r9", "c2"], .before = 4) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r9", "c1"], .before = 4) %>%
  add_row(Attribute = "Labour Female Sample", .before = 6) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r10", "c2"], .before = 9) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r10", "c1"], .before = 9) %>%
  add_row(Attribute = "Tory Male Sample", .before = 11) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r11", "c2"], .before = 14) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r11", "c1"], .before = 14) %>%
  add_row(Attribute = "Tory Female Sample", .before = 16) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r12", "c2"], .before = 19) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r12", "c1"], .before = 19) 

# Export to excel
addWorksheet(wb, "Table A9")
writeData(wb, "Table A9", combined_df_2)

###### Figure 7 ######

# Load data
stereo <- read_dta("DATA/for_R/stereotypes.dta")
stereo <- purrr::modify_if(stereo, is.labelled, as_factor)

# Clean data
data_summary <- stereo %>%
  filter(!is.na(stereotypes)) %>%
  group_by(attribute) %>%
  count(stereotypes) %>%
  mutate(Percentage = n/sum(n) * 100) %>%
  ungroup()

# Split neither categories
neither_split <- data_summary %>%
  filter(stereotypes == "Neither") %>%
  ungroup() %>%
  tidyr::uncount(2) %>%
  arrange(attribute) %>%
  mutate(Percentage = Percentage / 2) %>%
  mutate(stereotypes = case_when(row_number() %% 2 == 1 ~ "Neither1",
                                 TRUE ~ "Neither2")) # Combine data

#Flip the signs
data_summary <- data_summary %>%
  filter(stereotypes != "Neither") %>%
  bind_rows(neither_split) %>%
  arrange(attribute, stereotypes) %>%
  group_by(attribute) %>%
  mutate(cumulative = case_when(
    stereotypes == "Labour" ~ -Percentage,
    stereotypes == "Neither1" ~ -Percentage,
    stereotypes == "Neither2" ~ Percentage,
    TRUE ~ (Percentage)
  ))

# Custom colors
custom_colors <- c(
  "Labour"   = "#4D4D4D",   # Medium-dark gray
  "Neither1" = "#CCCCCC",   # Light gray
  "Neither2" = "#CCCCCC",   # Light gray
  "Tories"   = "#FFFFFF"    # Medium-light gray
)

# Plot
stereotypes_plot <- ggplot(data_summary, aes(x = attribute, y = cumulative, 
                                             fill = factor(stereotypes, levels = c("Labour", "Neither1", "Neither2", "Tories")))) +
  geom_bar(data = subset(data_summary, cumulative < 0), stat = "identity", position = "stack", width = 0.6, color="black") +
  geom_bar(data = subset(data_summary, cumulative >= 0), stat = "identity", position = position_stack(reverse = TRUE), width = 0.6, color="black") +
  scale_fill_manual(values = custom_colors,
                    name = "",
                    labels = c("Labour Party", "Neither", "Conservative Party"),
                    breaks = c("Labour", "Neither2", "Tories")) +
  geom_vline(xintercept = 0, color = "white", size = 0.5) +
  coord_flip(ylim = c(-100, 100)) +
  labs(y = "Percentage (%)", x = "Attributes") +
  theme_minimal() +  # use theme_minimal
  theme(legend.position = "bottom")

# Save the plot
ggsave("FIG/figure_7.png", plot = stereotypes_plot, width = 8, height = 6, units = "in")

###### Figure 8 ######

# Load data
tory_stereo <- read.table("DATA/for_R/tory_stereo.txt")
labour_stereo <- read.table("DATA/for_R/labour_stereo.txt")

# Clean data
tory_stereo <- tory_stereo[tory_stereo$Est. != 0, ]
labour_stereo <- labour_stereo[labour_stereo$Est. != 0, ]

rows_to_keep <- c("cj_race:White", "cj_diet:Veg", "cj_ideology:Progressive", "cj_edu:Degree")
tory_stereo <- tory_stereo[rownames(tory_stereo) %in% rows_to_keep, ]
labour_stereo <- labour_stereo[rownames(labour_stereo) %in% rows_to_keep, ]

tory_stereo$Attribute <- c("White out-partisan", "Vegetarian out-partisan", "Progressive out-partisan", "Out-partisan with degree")
labour_stereo$Attribute <- c("White out-partisan", "Vegetarian out-partisan", "Progressive out-partisan", "Out-partisan with degree")

tory_stereo$Group <- "Tory"
labour_stereo$Group <- "Labour"

combined_df_party <- rbind(tory_stereo, labour_stereo)

combined_df_party$Est. <- as.numeric(as.character(combined_df_party$Est.))
combined_df_party$SE <- as.numeric(as.character(combined_df_party$SE))
combined_df_party$LCI <- as.numeric(as.character(combined_df_party$LCI))
combined_df_party$UCI <- as.numeric(as.character(combined_df_party$UCI))
combined_df_party$Attribute <- as.factor(combined_df_party$Attribute)

combined_df_party$Attribute <- factor(
  combined_df_party$Attribute,
  levels = c( "Vegetarian out-partisan", "Out-partisan with degree",
            "White out-partisan",  "Progressive out-partisan"))

# Plot

labour_tory <- ggplot(combined_df_party, aes(x = Attribute, y = Est., ymin = LCI, ymax = UCI)) +
  geom_pointrange(aes(color = Group, shape = Group), position = position_dodge(width = 0), na.rm = TRUE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_color_manual(values = c(Tory = "black", Labour = "black")) +
  scale_shape_manual(values = c(Tory = 19, Labour = 1)) +  coord_flip() +
  labs(x = "Attribute", y = "AMCE", color = "Party", shape = "Party") +
  theme_minimal() +
  theme(
    axis.text.y = element_text(face = "bold", size=12),
    panel.spacing = unit(2, "lines"),              # Increase spacing between facets
    strip.background = element_rect(fill = "grey90", colour = "black", size = 1), # Stronger border and shading for facet labels
    strip.text = element_text(face = "bold"),       # Bold facet labels
    legend.position = "bottom"                      # Place legend at the bottom
  )

# Save plot
ggsave("FIG/figure_8.png", plot = labour_tory, dpi = 300)

###### Table A10 ######

combined_df_party <- combined_df_party[, c("Attribute", "Est.", "SE", "LCI", "UCI")]
combined_df_party[, c("SE", "Est.", "LCI", "UCI")] <- round(combined_df_party[, c("SE", "Est.", "LCI", "UCI")], 3)

combined_df_party <- combined_df_party %>%
  add_row(Attribute = "Tory Sample", .before = 1) %>%
  add_row(Attribute = "Number of Respondents", Est. = observations["r14", "c2"], .before = 6) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r14", "c1"], .before = 6) %>%
  add_row(Attribute = "Labour Sample", .before = 8) %>%
  add_row(Attribute = "Number of Observations", Est. = observations["r13", "c1"], .before = 14) %>% 
  add_row(Attribute = "Number of Respondents", Est. = observations["r13", "c2"], .before = 14) 

# Export to excel
addWorksheet(wb, "Table A10")
writeData(wb, "Table A10", combined_df_party)

###### Figures 5 & 6 ######

# Load data
tolerance <- read.table("DATA/for_R/tolerance.txt", fill = TRUE)
beauty <- read.table("DATA/for_R/beauty.txt", fill=TRUE)
height <- read.table("DATA/for_R/height.txt", fill=TRUE)

labels_list <- list(
  c("Intolerant", "Tolerant"),
  c("Unattractive", "Attractive"),
  c("Short", "Tall")
)

# Create a list of datasets
margins_list <- list(tolerance, beauty, height)

plot_data <- function(data, labels, plot_title) {
  y1 <- c(data$V2[2], data$V4[2])
  y2 <- c(data$V3[2], data$V5[2])
  x <- c(0, 1) # For "Out-party" and "In-party"
  
  plot(x, y1, type="b", pch=19, ylim=c(0.2, 0.8), 
       xlab="", ylab="Pr(Choice)", main=plot_title, cex.main=0.8, 
       xaxt="n", lty="dashed") # Suppress automatic x-axis
  
  lines(x, y1, type="b", pch=19, col="grey", lty="dashed") # Dashed line for y1
  lines(x, y2, type="b", pch=19, col="black") # Solid line for y2
  
  # Adding text labels
  text(0.5, (y1[1] + y1[2]) / 2 - 0.02, labels[1], pos=4, offset=0.5, cex=0.8) # Label for y1
  text(0.5, (y2[1] + y2[2]) / 2 + 0.02, labels[2], pos=2, offset=0.5, cex=0.8) # Label for y2
  
  axis(1, at=c(0, 1), labels=c("Out-party", "In-party"))
}

# Save height and beauty combined
png(filename = "FIG/figure_6.png", width=1200, height=800, res=200)
par(mfrow=c(1,2))
plot_data(margins_list[[2]], labels_list[[2]], "Beauty")
plot_data(margins_list[[3]], labels_list[[3]], "Height")
dev.off()

# Save the tolerance plot
png(filename = "FIG/figure_5.png", width=1000, height=900, res=200)
plot_data(margins_list[[1]], labels_list[[1]], "")
dev.off()

###### Save workbook ######
saveWorkbook(wb, "TAB/tables.xlsx", overwrite = TRUE)

